資料下載與匯入

  • 先確認您的電腦安裝了RR studio

  • 將資料下載下來,儲存到自己的工作空間。請點此。因Mac與Windows預設編碼不同,請下載屬於自己的檔案

  • 打開R studio,建立一個新專案,指定路徑到你的工作空間

  • 載入資料
  • 建議把游標移到" “裡面按Tab鍵選取檔案
data_EPA <- read.csv("環保署列管污染農地.csv")
data_TARI <- read.csv("農地重金屬超標未列管.csv")

PART1–環保署資料 (2016) 實作

1.使用dplyr 套件重現敘述統計表

# install.packages("dplyr")
library(dplyr)
#因為沒有全部要用,擷取需要用到的資訊,並另外命名為英文
data_EPA <- data_EPA %>% select(county=1, coordinate=3, area=7, control_date=8, free_date=10)

summarise_data <- data_EPA %>% group_by(county) %>% 
  summarise(count = n(), 
            sum_area = sum(area), 
            control_area = sum(area[free_date=="無"]), 
            free_area = sum(area[free_date!="無"]), 
            count_control = sum(free_date=="無"), 
            count_free = sum(free_date!="無")) %>% 
  mutate(avg_area = sum_area/count, 
         ratio_control = sprintf("%1.0f%%",count_control/count*100),
         ratio_free = sprintf("%1.0f%%",count_free/count*100))

2.計算平均列管月份

#把列管時間轉成日期格式
data_EPA$control_date <- data_EPA$control_date %>% as.Date()
data_EPA$free_date <- data_EPA$free_date %>% as.Date()
#把尚未解除列管的時間帶資料收集截止時間"2016-12-18",以便計算列管時間
data_EPA$free_date <- replace(data_EPA$free_date,which(is.na(data_EPA$free_date)),as.Date("2016-12-18"))

#計算列管月份
data_EPA$totol_month <- NA
for(i in 1:nrow(data_EPA)){
  tryCatch({ #因為資料裡有其中一筆沒有列管時間,我們加tryCatch讓他忽略錯誤,讓迴圈可以正常執行完
  data_EPA$totol_month[i] <- 
    length(seq(from=data_EPA$control_date[i], to=data_EPA$free_date[i], by='month')) 
  }, error=function(e){})
}

#計算各縣市列管中與解除列管的平均列管月份
avg_month <- data_EPA %>% group_by(county) %>% 
  summarise(control_month = mean(totol_month[free_date=="2016-12-18"]) %>% round(), 
            free_month = mean(totol_month[free_date!="2016-12-18"],na.rm = TRUE)%>% 
              round()) #因為資料中有一筆沒有列管時間,因此把參數na.rm改為TRUE,才可以運算

#把無列管中的平均月份帶0
avg_month[is.na(avg_month)] <- 0

#合併回summarise_data
summarise_data <- summarise_data %>% left_join(avg_month, by="county")

3.計算全國加總

tmp <- colSums(summarise_data[,2:8]) %>% t() %>% as.data.frame() %>% 
  mutate(ratio_control = sprintf("%1.0f%%",count_control/count*100),
         ratio_free = sprintf("%1.0f%%",count_free/count*100)) %>% 
  cbind(.,colMeans(summarise_data[,11:12])%>% round() %>% t() %>% as.data.frame()) %>%
  mutate(county="全國") %>% 
  select(12,1:11)
summarise_data <- rbind(tmp, summarise_data)
county count sum_area control_area free_area count_control count_free avg_area ratio_control ratio_free control_month free_month
全國 5485 9263582.55 3576260.23 5687322.33 2468 3017 51145.5310 45% 55% 40 35
宜蘭縣 5 11798.15 3156.64 8641.51 1 4 2359.6300 20% 80% 98 22
南投縣 11 4046.00 2747.00 1299.00 4 7 367.8182 36% 64% 167 103
屏東縣 3 75150.81 1235.16 73915.65 1 2 25050.2700 33% 67% 6 22
苗栗縣 37 43766.63 5729.23 38037.40 5 32 1182.8819 14% 86% 12 27
桃園市 1890 2511211.33 1216032.30 1295179.03 1060 830 1328.6832 56% 44% 48 42
高雄市 49 84948.44 0.00 84948.44 0 49 1733.6416 0% 100% 0 32
雲林縣 25 58230.54 4329.00 53901.54 2 23 2329.2216 8% 92% 78 31
新北市 13 37235.00 0.00 37235.00 0 13 2864.2308 0% 100% 0 22
新竹市 202 362010.73 2746.50 359264.23 2 200 1792.1323 1% 99% 7 27
嘉義市 19 46035.64 12938.00 33097.64 5 14 2422.9284 26% 74% 28 49
嘉義縣 2 4740.96 0.00 4740.96 0 2 2370.4800 0% 100% 0 16
彰化縣 2503 5038536.34 2282395.32 2756141.02 1340 1163 2012.9989 54% 46% 25 28
臺中市 600 742225.90 10292.02 731933.88 11 589 1237.0432 2% 98% 32 30
臺北市 22 48852.15 0.00 48852.15 0 22 2220.5523 0% 100% 0 37
臺南市 104 194793.93 34659.06 160134.87 37 67 1873.0186 36% 64% 105 30

4.使用plotly 套件繪製統計圖表

  • Bar Chart
# install.packages("plotly")
library(plotly)

plot_ly( summarise_data, x = ~county, y = ~count_control, type = "bar") %>% 
  layout(title = "所有案件",
         xaxis = list(title = 'county'),
         yaxis = list(title = 'count'),
         width = 750, height = 400)
  • Grouped Bar Chart
  • with Hover Text and Rotated Labels
plot_ly(summarise_data, x = ~county, y = ~count_control, type = 'bar',
        name = '列管案件', text = ~ratio_control) %>%
  add_trace(y = ~count_free, name = '解除列管案件', text = ~ratio_free) %>%
  layout(xaxis = list(title = "", tickangle = -45), 
         yaxis = list(title = 'count'), barmode = 'group',
         width = 800, height = 400)
  • Pie Chart
#列管縣市太多,很多面積很小,取場址面積大於中位數的來畫圖
summarise_data1 <- summarise_data %>% arrange(sum_area %>% desc) %>% filter(sum_area > median(sum_area))
plot_ly(summarise_data1, labels = ~county, values = ~sum_area, type = 'pie',
        textinfo = 'label+percent') %>% layout(title = '場址面積總和',  width = 800, height = 500,
                                               margin=list(l = 100, r = 50, b = 100,t = 100,pad = 4))

5.場址座標轉經緯度

  • 將經緯度資訊從TWD07轉為WGS84

  • 先下載轉碼程式包並執行

  • 把經緯度資訊抓出來,分割成x軸與y軸

head(data_EPA,3) #可以看到座標欄位裡面有:和,
##   county            coordinate area control_date  free_date totol_month
## 1 臺北市 X:301277,Y:2777431  387   2002-10-04 2004-11-02          25
## 2 臺北市 X:301267,Y:2777372 7738   2002-10-04 2004-11-02          25
## 3 臺北市 X:301310,Y:2777998  580   2004-01-27 2005-03-01          14
TWD97 <- data_EPA$coordinate %>% as.character() %>% 
  stringr::str_split(.,'[,:]',simplify = TRUE) %>% 
  as.data.frame(., stringsAsFactors=FALSE) %>% select(x=2,y=4) 
  • 利用轉碼函式將TWD97座標轉成經緯度座標系統
WGS84 <- TWD97TM2toWGS84(TWD97$x, TWD97$y) %>% as.data.frame()

#合併回原本的data
data_EPA$coordinate <- NULL
data_EPA <- cbind(data_EPA,TWD97,WGS84)
head(data_EPA)
##   county    area control_date  free_date totol_month      x       y
## 1 臺北市  387.00   2002-10-04 2004-11-02          25 301277 2777431
## 2 臺北市 7738.00   2002-10-04 2004-11-02          25 301267 2777372
## 3 臺北市  580.00   2004-01-27 2005-03-01          14 301310 2777998
## 4 臺北市 3205.65   2004-12-10 2008-04-18          41 299512 2779782
## 5 臺北市 2855.14   2004-12-10 2008-04-18          41 299518 2779865
## 6 臺北市 3513.00   2004-12-10 2008-04-18          41 299669 2779961
##        lat      lon
## 1 25.10434 121.5084
## 2 25.10381 121.5083
## 3 25.10946 121.5088
## 4 25.12562 121.4910
## 5 25.12637 121.4911
## 6 25.12723 121.4926

PART2–農試所資料實作

1.重金屬轉換公式

data_TARI <- data_TARI %>% mutate(CUAR= 2.035*CU + 11.884,
                     CUAR_OVER = ifelse(CUAR>200,1,0),
                     ZNAR = 2.487*ZN + 89.711,
                     ZNAR_OVER = ifelse(ZNAR>600,1,0),
                     CDAR = 1.4578 *CD + 0.0323,
                     CDAR_OVER = ifelse(CDAR>5,1,0),
                     CRAR = 17.35 * CR+ 31.91,
                     CRAR_OVER = ifelse(CRAR>250,1,0),
                     NIAR = 5.13 * NI+ 14.56,
                     NIAR_OVER = ifelse(NIAR>200,1,0),
                     PBAR = 2.811 *PB+ 6.715,
                     PBAR_OVER = ifelse(PBAR>500,1,0),
                     OVER = ifelse(CUAR_OVER+ZNAR_OVER+CDAR_OVER+CRAR_OVER+
                                     NIAR_OVER+PBAR_OVER>=1,1,0))

2.計算農地樣區是否有超標情形

data_EPA_monitor<- data_EPA%>% filter(free_date=="2016-12-18")
for(i in 1:nrow(data_TARI)){
  data_TARI$mindistance[i]= min(na.exclude(sqrt((data_TARI$XLO[i]-as.integer(data_EPA_monitor$x))^2+(data_TARI$YLO[i]-as.integer(data_EPA_monitor$y))^2)))
}

for(i in 1:nrow(data_TARI)){
  data_TARI$minover[i] <- sum(na.exclude(sqrt((data_TARI$XLO[i]-data_TARI$XLO)^2+(data_TARI$YLO[i]-data_TARI$YLO)^2))<=1000)
}

data_TARI <- data_TARI %>% mutate(Is_monitored=ifelse(mindistance<=200,1,0))

3.與環保署的資料合併,計算統計指標

library(scales)
stat_over <- data_TARI %>% group_by(V2) %>%
  summarise(`重金屬超標`=sum(OVER),
            `超標未列管`=sum(ifelse(OVER+Is_monitored==1,1,0))) %>%
  mutate(`超標未列管比例`=percent(`超標未列管`/`重金屬超標`),
         `超標列管比例`=percent(1-`超標未列管`/`重金屬超標`))

PART3–用leaflet畫地圖

地圖一:兩份資料交叉比對

# install.packages("leaflet")
library(leaflet)

#先把要點上圖的資料篩選出來
EPA <- data_EPA %>% filter(free_date=="2016-12-18") %>% select(lat,lon,area)
TARI <- data_TARI %>% select(lat = Y,lon = X, area = AREA)


leaflet() %>% setView(lng=120.58,lat=23.58,zoom=8) %>% 
  addProviderTiles("Esri.WorldImagery") %>%
  addCircles(data = EPA, color = "red",lng = ~lon, lat = ~lat, weight = 1, radius = ~sqrt(area/pi)) %>%
  addCircles(data = TARI, color = "yellow",lng = ~lon, lat = ~lat, weight = 1, radius = ~sqrt(area/pi))

地圖二:用log(mindistance)畫地圖

TARI <- data_TARI %>% select(lat = Y,lon = X, area = AREA ,mindistance) %>% mutate(log_mindistance=log(mindistance))

leaflet() %>% 
  addCircles(data=TARI,lng = ~lon, lat = ~lat, weight = 2,
             color=~colorNumeric(c("#CD0000", "#FFFFFF","#0B752F"),
                                 log_mindistance)(log_mindistance)) %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addLegend(position = 'topleft',
            pal =colorNumeric(c("#CD0000","#FFFFFF","#0B752F"),
                              domain=TARI$log_mindistance),
            values=TARI$log_mindistance,
            title = 'log-mindistance')